#clean up
rm(list=ls())

#load packages
library(readxl) #replaces XLConnect
library(foreign)
library(car)

#front matter
setwd("/Volumes/MONOGAN/psrsd/anxiety2016/data")

###OPEN ENDED RESPONSE MANAGEMENT###
text=as.data.frame(read_excel("anes2012TS_openends.xlsx",sheet=1,col_names=TRUE))
#names(text)
#head(text)
#tail(text)
#text=subset(text.0,select=c(caseid,candlik_likewhatdpc))

##Democratic likes##
#examine, recode empty observations
text$candlik_likewhatdpc[1:6]
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-1 Inapplicable"]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)

#cut spaces, and turn verious delimiters into comma delimiters
#text$candlik_likewhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)
text$candlik_likewhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)
text$candlik_likewhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)
text$candlik_likewhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)
text$candlik_likewhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc)

#check for empty strings, turn to missing
text$candlik_likewhatdpc[text$candlik_likewhatdpc ==""]=NA
#text$candlik_likewhatdpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatdpc.list=strsplit(text$candlik_likewhatdpc,split=","); head(candlik_likewhatdpc.list,13)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatdpc.list,length));count[1:13];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatdpc.list,'[[',1),is.na));empty[1:13];length(empty)
text$dem.like=count-empty
head(text$candlik_likewhatdpc);head(text$dem.like)
tail(text$candlik_likewhatdpc);tail(text$dem.like)

##Democratic dislikes##
#examine, recode empty observations
text$candlik_dislwhatdpc[1:6]
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)

#cut spaces, and turn verious delimiters into comma delimiters
#text$candlik_dislwhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)
text$candlik_dislwhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)
text$candlik_dislwhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)
text$candlik_dislwhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)
text$candlik_dislwhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc)

#check for empty strings, turn to missing
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc ==""]=NA
#text$candlik_likewhatdpc[text$candlik_dislwhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatdpc.list=strsplit(text$candlik_dislwhatdpc,split=","); head(candlik_dislwhatdpc.list,13)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatdpc.list,length));count[1:13];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatdpc.list,'[[',1),is.na));empty[1:13];length(empty)
text$dem.dislike=count-empty
head(text$candlik_dislwhatdpc);head(text$dem.dislike)
tail(text$candlik_dislwhatdpc);tail(text$dem.dislike)

##Republican likes##
#examine, recode empty observations
text$candlik_likewhatrpc[1:6]
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-1 Inapplicable"]=NA
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)

#cut spaces, and turn verious delimiters into comma delimiters
#text$candlik_likewhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)
text$candlik_likewhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)
text$candlik_likewhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)
text$candlik_likewhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)
text$candlik_likewhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc)

#check for empty strings, turn to missing
text$candlik_likewhatrpc[text$candlik_likewhatrpc ==""]=NA
#text$candlik_likewhatdpc[text$candlik_likewhatrpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatrpc.list=strsplit(text$candlik_likewhatrpc,split=","); head(candlik_likewhatrpc.list,13)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatrpc.list,length));count[1:13];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatrpc.list,'[[',1),is.na));empty[1:13];length(empty)
text$rep.like=count-empty
head(text$candlik_likewhatrpc);head(text$rep.like)
tail(text$candlik_likewhatrpc);tail(text$rep.like)

##Republican dislikes##
#examine, recode empty observations
text$candlik_dislwhatrpc[1:6]
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)

#cut spaces, and turn verious delimiters into comma delimiters
#text$candlik_dislwhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)
text$candlik_dislwhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)
text$candlik_dislwhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)
text$candlik_dislwhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)
text$candlik_dislwhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc)

#check for empty strings, turn to missing
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc ==""]=NA
#text$candlik_dislwhatrpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatrpc.list=strsplit(text$candlik_dislwhatrpc,split=","); head(candlik_dislwhatrpc.list,13)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatrpc.list,length));count[1:13];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatrpc.list,'[[',1),is.na));empty[1:13];length(empty)
text$rep.dislike=count-empty
head(text$candlik_dislwhatrpc);head(text$rep.dislike)
tail(text$candlik_dislwhatrpc);tail(text$rep.dislike)

##Create the end count file##
text.count=subset(text,select=c(caseid,dem.like,dem.dislike,rep.like,rep.dislike))
write.csv(text.count,"textCount2012.csv",row.names=F)


###QUANTITATIVE RESPONSE MANAGEMENT###
#load data
anes.0=read.dta("anes_timeseries_2012_Stata12.dta",convert.factors=F)

#subset
anes.1=subset(anes.0,select=c(version,caseid,postvote_presvtwho,candaff_angdpc,candaff_angdpcoft,candaff_hpdpc,candaff_hpdpcoft,candaff_afrdpc,candaff_afrdpcoft,candaff_prddpc,candaff_prddpcoft,candaff_angrpc,candaff_angrpcoft,candaff_hprpc,candaff_hprpcoft,candaff_afrrpc,candaff_afrrpcoft,candaff_prdrpc,candaff_prdrpcoft,pid_x,spsrvpr_ssself,spsrvpr_ssdpc,spsrvpr_ssrpc,defsppr_self,defsppr_dpc,defsppr_rpc,inspre_self,inspre_dpc,inspre_rpc,guarpr_self,guarpr_dpc,guarpr_rpc,aidblack_self,aidblack_dpc,aidblack_rpc,envjob_self,envjob_dpc,envjob_rpc,candlik_likedpc,candlik_disldpc,candlik_likerpc,candlik_dislrpc))

#merge
anes=merge(x=anes.1,y=text.count,by='caseid')

#clean vote choice variable, subset to those who voted for D (coded #1) or R (coded #2)
#also, eliminate true independents (pid_x==4)
anes$postvote_presvtwho[anes$postvote_presvtwho%in%c(-9,-7,-6,-1,5)]=NA
anes=subset(anes,subset=!is.na(postvote_presvtwho) & pid_x!=4)

#data cleaning, any negative number is out, as these are all missing data codes
anes[anes<0]=NA

#summary(anes)
#table(anes<0)

#voted for candidate of own party, or of Republican
anes$own=as.numeric(anes$postvote_presvtwho==2 & anes$pid_x>4 | anes$postvote_presvtwho==1 & anes$pid_x<4)
anes$vote.rep=anes$postvote_presvtwho-1

#rescale partisanship
anes$pid_x =(anes$pid_x-min(anes$pid_x,na.rm=T))/(max(anes$pid_x,na.rm=T)-min(anes$pid_x,na.rm=T))

#issue advantage measure
anes$issues=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$inspre_dpc-anes$inspre_self)-abs(anes$inspre_rpc-anes$inspre_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)+abs(anes$aidblack_dpc-anes$aidblack_self)-abs(anes$aidblack_rpc-anes$aidblack_self)+abs(anes$envjob_dpc-anes$envjob_self)-abs(anes$envjob_rpc-anes$envjob_self)
anes$issues =(anes$issues-min(anes$issues,na.rm=T))/(max(anes$issues,na.rm=T)-min(anes$issues,na.rm=T))

#alternate issue advantage measure for longer-term analysis
anes$issues.3=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)
anes$issues.3 =(anes$issues.3-min(anes$issues.3,na.rm=T))/(max(anes$issues.3,na.rm=T)-min(anes$issues.3,na.rm=T))
time.issue<-anes[,c("caseid","issues.3")]
#write.csv(time.issue,"timeIssue12.csv",row.names=F)
anes<-subset(anes,select=-c(issues.3))

#candidate personal quality measure
anes$personal=anes$rep.like+anes$dem.dislike-anes$rep.dislike-anes$dem.like
anes$personal=(anes$personal-min(anes$personal,na.rm=T))/(max(anes$personal,na.rm=T)-min(anes$personal,na.rm=T))

#recode anxiety variables for those saying "no" to an emotion
anes$candaff_angdpcoft[anes$candaff_angdpc==2]=5
anes$candaff_hpdpcoft[anes$candaff_hpdpc==2]=5
anes$candaff_afrdpcoft[anes$candaff_afrdpc==2]=5
anes$candaff_prddpcoft[anes$candaff_prddpc==2]=5
anes$candaff_angrpcoft[anes$candaff_angrpc==2]=5
anes$candaff_hprpcoft[anes$candaff_hprpc==2]=5
anes$candaff_afrrpcoft[anes$candaff_afrrpc==2]=5
anes$candaff_prdrpcoft[anes$candaff_prdrpc==2]=5

#reverse coding so that higher values mean more of the emotion
anes$candaff_angdpcoft=(recode(anes$candaff_angdpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_hpdpcoft=(recode(anes$candaff_hpdpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_afrdpcoft=(recode(anes$candaff_afrdpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_prddpcoft=(recode(anes$candaff_prddpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_angrpcoft=(recode(anes$candaff_angrpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_hprpcoft=(recode(anes$candaff_hprpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_afrrpcoft=(recode(anes$candaff_afrrpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4
anes$candaff_prdrpcoft=(recode(anes$candaff_prdrpcoft,'1=5;2=4;3=3;4=2;5=1')-1)/4

#candidate own emotions
anes$democrat=as.numeric(anes$pid_x<.5)
anes$ang.own=ifelse(anes$democrat==1,anes$candaff_angdpcoft,anes$candaff_angrpcoft)
anes$hp.own=ifelse(anes$democrat==1,anes$candaff_hpdpcoft,anes$candaff_hprpcoft)
anes$afr.own=ifelse(anes$democrat==1,anes$candaff_afrdpcoft,anes$candaff_afrrpcoft)
anes$prd.own=ifelse(anes$democrat==1,anes$candaff_prddpcoft,anes$candaff_prdrpcoft)


###Write Data###
anes=na.omit(subset(anes,select=c(caseid,vote.rep,pid_x,issues,personal,ang.own,hp.own,afr.own,prd.own)))
summary(anes); dim(anes)
write.table(anes,"anes12.txt",row.names=F)



